X-Git-Url: http://git.cyclocoop.org//%22http:/%22.attribut_html%28%24lesurls%5B%24numero%5D%29.%22/%22?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=f611358d94c09b5e2faa3d91b05b297cfccb397e;hb=1ce52c41fcef3cb6fc8dba10ad3a6d570373d8bd;hp=213778682e41cf62f68693b9883bbc92770f5074;hpb=d00939f76d573ef1c57bdc0f80fe73f731fd390b;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 213778682e..f611358d94 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -20,6 +20,8 @@ */ use MediaWiki\MediaWikiServices; +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -37,6 +39,7 @@ class LanguageConverter { */ static public $languagesWithVariants = [ 'en', + 'crh', 'gan', 'iu', 'kk', @@ -160,6 +163,8 @@ class LanguageConverter { $req = $this->getURLVariant(); + Hooks::run( 'GetLangPreferredVariant', [ &$req ] ); + if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) { $req = $this->getUserVariant(); } elseif ( !$req ) { @@ -339,7 +344,6 @@ class LanguageConverter { * @return string The converted text */ public function autoConvert( $text, $toVariant = false ) { - $this->loadTables(); if ( !$toVariant ) { @@ -352,26 +356,34 @@ class LanguageConverter { if ( $this->guessVariant( $text, $toVariant ) ) { return $text; } - /* we convert everything except: - * 1. HTML markups (anything between < and >) - * 2. HTML entities - * 3. placeholders created by the parser - */ - $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; + 1. HTML markups (anything between < and >) + 2. HTML entities + 3. placeholders created by the parser + IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). + Minimize use of backtracking where possible. + */ + $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags - $codefix = '.+?<\/code>|'; + $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of